import os
import random
import shutil
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import yaml
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms
from torchvision.utils import save_image
import pdb
import sys
sys.path.append('..')
from template import utils, utils_v2
transforms.functional.resize
<function torchvision.transforms.functional.resize(img: torch.Tensor, size: List[int], interpolation: torchvision.transforms.functional.InterpolationMode = <InterpolationMode.BILINEAR: 'bilinear'>, max_size: Optional[int] = None, antialias: Union[str, bool, NoneType] = 'warn') -> torch.Tensor>
utils_v2.save_model
<function template.utils_v2.save_model(model, optimizer, epoch, stats, margin)>
utils.set_random_seeds()
torch.use_deterministic_algorithms(False)
# setting config
config = yaml.safe_load(open("config.yaml"))
print(f"Our config: {config}")
Our config: {'BATCH_SIZE': 64, 'NUM_EPOCHS': 10, 'LR': '3e-4'}
LFWPeople_tf = transforms.Compose([
transforms.ToTensor(),
transforms.Pad(2),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
train_dataset = datasets.LFWPeople(root='./data', transform=LFWPeople_tf, split="train",download=True)
test_dataset = datasets.LFWPeople(root='./data', transform=LFWPeople_tf, split="test",download=True)
test_dataset, val_dataset = torch.utils.data.random_split(test_dataset, [0.5, 0.5])
Files already downloaded and verified Files already downloaded and verified
print(f"train set size: {len(train_dataset)}")
print(f"validation set size: {len(val_dataset)}")
print(f"test set size: {len(test_dataset)}")
train set size: 9525 validation set size: 1854 test set size: 1854
class TripletDataset:
"""
Dataset class from which we sample random triplets
"""
def __init__(self, dataset):
""" Dataset initializer"""
self.dataset = dataset
self.arange = np.arange(len(self.dataset))
self.labels = torch.Tensor([l for _,l in dataset])
self.cleaned_dataset, self.cleaned_labels = self._clean_singular_anchors()
self.arange_cleaned = np.arange(len(self.cleaned_dataset))
return
def __len__(self):
""" Returning number of anchors """
return len(self.cleaned_dataset)
def _clean_singular_anchors(self):
""" This method cleans the dataset. It deletes the samples, in which there was only one face (cannot create positive and anchor faces).
"""
singular_anchors = []
for i in range(len(self.dataset)):
_ , anchor_lbl = self.dataset[i]
pos_ids = self.arange[torch.where(self.labels == anchor_lbl)]
if isinstance(pos_ids, np.int64):
singular_anchors.append(i)
self.cleaned_dataset = [self.dataset[i] for i in range(len(self.dataset)) if i not in singular_anchors]
self.cleaned_labels = torch.Tensor([l for _,l in self.cleaned_dataset])
return self.cleaned_dataset, self.cleaned_labels
def __getitem__(self, i):
"""
Sampling a triplet for the dataset. Index i corresponds to anchor
"""
# sampling anchor
anchor_img, anchor_lbl = self.cleaned_dataset[i]
# lists for positives and negatives
pos_ids = self.arange_cleaned[torch.where(self.cleaned_labels == anchor_lbl)]
neg_id = self.arange[torch.where(self.labels != anchor_lbl)]
pos_ids = np.delete(pos_ids, 0) # Deleting anchor face from the positive faces. Anchor id is always at the beggining of an array.
pos_id, neg_id = random.choice(pos_ids).item(), random.choice(neg_id).item()
pos_img, pos_lbl = self.cleaned_dataset[pos_id]
neg_img, neg_lbl = self.dataset[neg_id]
return (anchor_img, pos_img, neg_img), (anchor_lbl, pos_lbl, neg_lbl)
tri_train_dataset = TripletDataset(train_dataset)
tri_val_dataset = TripletDataset(val_dataset)
tri_test_dataset = TripletDataset(test_dataset)
There may be more subtle way to get rid of single "no positive faces except anchor" problem, f.e. making use of them in negative samples but we will try this approach in the beggining.
print(f"train set size after cleanup: {len(tri_train_dataset)}")
print(f"validation set size after cleanup: {len(tri_val_dataset)}")
print(f"test set size after cleanup: {len(tri_test_dataset)}")
train set size after cleanup: 6671 validation set size after cleanup: 1110 test set size after cleanup: 1065
def clip_img(img):
maxValue = np.amax(img)
minValue = np.amin(img)
img = np.clip(img, 0, 1)
return img
fig_anchors, anchors = plt.subplots(1, 10, figsize=(30,3))
fig_pos, positives = plt.subplots(1, 10, figsize=(30,3))
fig_neg, negatives = plt.subplots(1, 10, figsize=(30,3))
for i in range(10):
try:
(anchor, positive, negative), _ = tri_val_dataset[i]
anchor = anchor.numpy().reshape(3,254,254).transpose(1, 2, 0)
positive = positive.numpy().reshape(3,254,254).transpose(1, 2, 0)
negative = negative.numpy().reshape(3,254,254).transpose(1, 2, 0)
anchor = clip_img(anchor)
positive = clip_img(positive)
negative = clip_img(negative)
anchors[i].imshow(anchor)
anchors[i].axis("off")
positives[i].imshow(positive)
positives[i].axis("off")
negatives[i].imshow(negative)
negatives[i].axis("off")
except ValueError:
continue
fig_anchors.suptitle("Anchors")
fig_pos.suptitle("Positives")
fig_neg.suptitle("Negative")
plt.tight_layout()
batch_size = int(config["BATCH_SIZE"])
train_loader = torch.utils.data.DataLoader(dataset=tri_train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=tri_val_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=tri_test_dataset, batch_size=batch_size, shuffle=False)
class NormLayer(nn.Module):
""" Layer that computer embedding normalization """
def __init__(self, l=2):
""" Layer initializer """
assert l in [1, 2]
super().__init__()
self.l = l
return
def forward(self, x):
""" Normalizing embeddings x. The shape of x is (B,D) """
x_normalized = x / torch.norm(x, p=self.l, dim=-1, keepdim=True)
return x_normalized
class SiameseModel(nn.Module):
"""
Implementation of a simple siamese model
"""
def __init__(self, emb_dim=32, in_spatial=(32, 32), use_pretrained = True):
""" Module initializer """
super().__init__()
# convolutional feature extractor
resnet = models.resnet18(pretrained=use_pretrained)
modules = list(resnet.children())[:-1]
self.resnet = nn.Sequential(*modules)
# fully connected embedder
self.fc = nn.Linear(512, emb_dim)
# auxiliar layers
self.flatten = nn.Flatten()
self.norm = NormLayer()
return
def forward_one(self, x):
""" Forwarding just one sample through the model """
x = self.resnet(x)
x_flat = self.flatten(x)
x_emb = self.fc(x_flat)
x_emb_norm = self.norm(x_emb)
return x_emb_norm
def forward(self, anchor, positive, negative):
""" Forwarding a triplet """
anchor_emb = self.forward_one(anchor)
positive_emb = self.forward_one(positive)
negative_emb = self.forward_one(negative)
return anchor_emb, positive_emb, negative_emb
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SiameseModel(use_pretrained = False)
model = model.to(device)
/home/user/lschulze/anaconda3/envs/lab/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/user/lschulze/anaconda3/envs/lab/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`. warnings.warn(msg)
Triplet loss and Trainer shall initially remain unchanged
class TripletLoss(nn.Module):
""" Implementation of the triplet loss function """
def __init__(self, margin=0.2, reduce="mean"):
""" Module initializer """
assert reduce in ["mean", "sum"]
super().__init__()
self.margin = margin
self.reduce = reduce
return
def forward(self, anchor, positive, negative):
""" Computing pairwise distances and loss functions """
# L2 distances
d_ap = (anchor - positive).pow(2).sum(dim=-1)
d_an = (anchor - negative).pow(2).sum(dim=-1)
# triplet loss function
loss = (d_ap - d_an + self.margin)
loss = torch.maximum(loss, torch.zeros_like(loss))
# averaging or summing
loss = torch.mean(loss) if(self.reduce == "mean") else torch.sum(loss)
return loss
class SemiHardTripletLoss(nn.Module):
""" Implementation of the triplet loss function """
def __init__(self, margin=0.2, reduce="mean"):
""" Module initializer """
assert reduce in ["mean", "sum"]
super().__init__()
self.margin = margin
self.reduce = reduce
return
def forward(self, anchor, positive, negative, labels):
"""
Compute the TripletLoss using semi-hard negative mining strategy.
This function is vectorized and does not contain explicit for loops.
"""
# Compute distance of positive examples
d_ap = (anchor - positive).pow(2).sum(dim=-1)
# Compute pairwise distance between the anchors
anchor_pairwise_dist = torch.cdist(anchor, anchor, p=2)
# Create a large diagonal matrix
diag = torch.eye(len(anchor)).to(anchor.device) * 1e6
# Add it to the pairwise distance to ignore diagonal values when calculating minimum
anchor_pairwise_dist = anchor_pairwise_dist + diag
# Compute minimum distance of the other anchors meeting the given requirements
mask = (anchor_pairwise_dist > d_ap.unsqueeze(1)) & (labels != labels.unsqueeze(1))
d_an, _ = torch.where(mask, anchor_pairwise_dist, 1e6).min(dim=1)
# triplet loss function
loss = (d_ap - d_an + self.margin)
loss = torch.maximum(loss, torch.zeros_like(loss))
# averaging or summing
loss = torch.mean(loss) if(self.reduce == "mean") else torch.sum(loss)
return loss
class Trainer:
"""
Class for training and validating a siamese model
"""
def __init__(self, model, criterion, train_loader, valid_loader, n_iters=1e4, save_freq = 500, margin = 0.2, save_name='siamese'):
""" Trainer initializer """
self.model = model
self.criterion = criterion
self.train_loader = train_loader
self.valid_loader = valid_loader
self.n_iters = int(n_iters)
self.optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(self.device)
self.save_name = save_name
self.margin = 0.2
self.save_freq = save_freq
self.train_loss = []
self.valid_loss = []
return
@torch.no_grad()
def valid_step(self, val_iters=100):
""" Some validation iterations """
self.model.eval()
cur_losses = []
for i, ((anchors, positives, negatives), (labels, _, _)) in enumerate(self.valid_loader):
# setting inputs to GPU
anchors = anchors.to(self.device)
positives = positives.to(self.device)
negatives = negatives.to(self.device)
labels = labels.to(device)
# forward pass and triplet loss
anchor_emb, positive_emb, negative_emb = self.model(anchors, positives, negatives)
loss = self.criterion(anchor_emb, positive_emb, negative_emb)
cur_losses.append(loss.item())
if(i >= val_iters):
break
self.valid_loss += cur_losses
self.model.train()
return cur_losses
def fit(self):
""" Train/Validation loop """
self.iter_ = 0
progress_bar = tqdm(total=self.n_iters, initial=0)
for i in range(self.n_iters):
for (anchors, positives, negatives), (labels, _, _) in self.train_loader:
# setting inputs to GPU
anchors = anchors.to(self.device)
positives = positives.to(self.device)
negatives = negatives.to(self.device)
labels = labels.to(device)
# forward pass and triplet loss
anchor_emb, positive_emb, negative_emb = self.model(anchors, positives, negatives)
loss = self.criterion(anchor_emb, positive_emb, negative_emb)
self.train_loss.append(loss.item())
# optimization
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# updating progress bar
progress_bar.set_description(f"Train Iter {self.iter_}: Loss={round(loss.item(),5)})")
# doing some validation every once in a while
if(self.iter_ % 250 == 0):
cur_losses = self.valid_step()
print(f"Valid loss @ iteration {self.iter_}: Loss={np.mean(cur_losses)}")
# saving model every n-th iter
if(self.iter_ % self.save_freq == 0):
stats = {
"train_loss": self.train_loss,
"valid_loss": self.valid_loss
}
utils_v2.save_model(self.model, self.optimizer, self.iter_, stats, margin=self.margin)
self.iter_ = self.iter_+1
if(self.iter_ >= self.n_iters):
break
if(self.iter_ >= self.n_iters):
break
return
criterion = TripletLoss(margin=0.2)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SiameseModel(use_pretrained = True)
model = model.to(device)
trainer = Trainer(model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, n_iters=10001, save_freq = 500, save_name = 'resnet')
# train_loader_semihard = torch.utils.data.DataLoader(dataset=tri_train_dataset, batch_size=128, shuffle=True)
/home/user/rogf1/miniconda3/envs/lab2/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/user/rogf1/miniconda3/envs/lab2/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
torch.cuda.empty_cache()
trainer.fit()
Train Iter 0: Loss=0.13277): 0%| | 0/10001 [00:00<?, ?it/s]
Valid loss @ iteration 0: Loss=0.08812464132077164
Train Iter 250: Loss=0.00418): 0%| | 0/10001 [01:18<?, ?it/s]
Valid loss @ iteration 250: Loss=0.03119450140123566
Train Iter 500: Loss=0.02005): 0%| | 0/10001 [02:36<?, ?it/s]
Valid loss @ iteration 500: Loss=0.023577456569506064
Train Iter 750: Loss=0.02279): 0%| | 0/10001 [03:55<?, ?it/s]
Valid loss @ iteration 750: Loss=0.029062614840869274
Train Iter 1000: Loss=0.01118): 0%| | 0/10001 [05:13<?, ?it/s]
Valid loss @ iteration 1000: Loss=0.026584343208620947
Train Iter 1250: Loss=0.01613): 0%| | 0/10001 [06:33<?, ?it/s]
Valid loss @ iteration 1250: Loss=0.024931369504580896
Train Iter 1500: Loss=0.02395): 0%| | 0/10001 [07:51<?, ?it/s]
Valid loss @ iteration 1500: Loss=0.024647042737342417
Train Iter 1750: Loss=0.00787): 0%| | 0/10001 [09:10<?, ?it/s]
Valid loss @ iteration 1750: Loss=0.017044874887991075
Train Iter 2000: Loss=0.00173): 0%| | 0/10001 [10:30<?, ?it/s]
Valid loss @ iteration 2000: Loss=0.019504211403222546
Train Iter 2250: Loss=0.00496): 0%| | 0/10001 [11:50<?, ?it/s]
Valid loss @ iteration 2250: Loss=0.03160018360035287
Train Iter 2500: Loss=0.01267): 0%| | 0/10001 [13:08<?, ?it/s]
Valid loss @ iteration 2500: Loss=0.02260981225926015
Train Iter 2750: Loss=0.01511): 0%| | 0/10001 [14:27<?, ?it/s]
Valid loss @ iteration 2750: Loss=0.0185028635767392
Train Iter 3000: Loss=0.00103): 0%| | 0/10001 [15:46<?, ?it/s]
Valid loss @ iteration 3000: Loss=0.0263065315472583
Train Iter 3250: Loss=0.01087): 0%| | 0/10001 [17:05<?, ?it/s]
Valid loss @ iteration 3250: Loss=0.017483670704273716
Train Iter 3500: Loss=0.00178): 0%| | 0/10001 [18:24<?, ?it/s]
Valid loss @ iteration 3500: Loss=0.019644443928781483
Train Iter 3750: Loss=0.00852): 0%| | 0/10001 [19:46<?, ?it/s]
Valid loss @ iteration 3750: Loss=0.015427874233056273
Train Iter 4000: Loss=0.00105): 0%| | 0/10001 [21:04<?, ?it/s]
Valid loss @ iteration 4000: Loss=0.01955404828509523
Train Iter 4250: Loss=0.01419): 0%| | 0/10001 [22:23<?, ?it/s]
Valid loss @ iteration 4250: Loss=0.023433545449127752
Train Iter 4500: Loss=0.00821): 0%| | 0/10001 [23:43<?, ?it/s]
Valid loss @ iteration 4500: Loss=0.019490658008079562
Train Iter 4750: Loss=0.0): 0%| | 0/10001 [25:02<?, ?it/s]
Valid loss @ iteration 4750: Loss=0.01998594762214149
Train Iter 5000: Loss=0.00315): 0%| | 0/10001 [26:20<?, ?it/s]
Valid loss @ iteration 5000: Loss=0.01742938917595893
Train Iter 5250: Loss=0.00487): 0%| | 0/10001 [27:39<?, ?it/s]
Valid loss @ iteration 5250: Loss=0.02240612130198214
Train Iter 5500: Loss=0.00385): 0%| | 0/10001 [28:57<?, ?it/s]
Valid loss @ iteration 5500: Loss=0.019146740436553955
Train Iter 5750: Loss=0.01374): 0%| | 0/10001 [30:16<?, ?it/s]
Valid loss @ iteration 5750: Loss=0.02033214871254232
Train Iter 6000: Loss=0.00804): 0%| | 0/10001 [31:36<?, ?it/s]
Valid loss @ iteration 6000: Loss=0.02458224016138249
Train Iter 6250: Loss=0.00624): 0%| | 0/10001 [33:00<?, ?it/s]
Valid loss @ iteration 6250: Loss=0.021047418807736702
Train Iter 6500: Loss=0.01828): 0%| | 0/10001 [34:19<?, ?it/s]
Valid loss @ iteration 6500: Loss=0.02278200112697151
Train Iter 6750: Loss=0.0): 0%| | 0/10001 [35:45<?, ?it/s]
Valid loss @ iteration 6750: Loss=0.01906205338632895
Train Iter 7000: Loss=0.00397): 0%| | 0/10001 [37:05<?, ?it/s]
Valid loss @ iteration 7000: Loss=0.02190377889201045
Train Iter 7250: Loss=0.00729): 0%| | 0/10001 [38:36<?, ?it/s]
Valid loss @ iteration 7250: Loss=0.022410922373334568
Train Iter 7500: Loss=0.00816): 0%| | 0/10001 [40:12<?, ?it/s]
Valid loss @ iteration 7500: Loss=0.02052693610959169
Train Iter 7750: Loss=0.0): 0%| | 0/10001 [41:48<?, ?it/s]
Valid loss @ iteration 7750: Loss=0.021680258240343794
Train Iter 8000: Loss=0.00129): 0%| | 0/10001 [43:17<?, ?it/s]
Valid loss @ iteration 8000: Loss=0.016153043588726886
Train Iter 8250: Loss=0.0021): 0%| | 0/10001 [44:36<?, ?it/s]
Valid loss @ iteration 8250: Loss=0.01866393106886082
Train Iter 8500: Loss=0.00015): 0%| | 0/10001 [45:55<?, ?it/s]
Valid loss @ iteration 8500: Loss=0.01908704809223612
Train Iter 8750: Loss=0.00364): 0%| | 0/10001 [47:13<?, ?it/s]
Valid loss @ iteration 8750: Loss=0.01791204716492858
Train Iter 9000: Loss=0.0075): 0%| | 0/10001 [48:32<?, ?it/s]
Valid loss @ iteration 9000: Loss=0.01787768429817839
Train Iter 9250: Loss=0.0): 0%| | 0/10001 [49:51<?, ?it/s]
Valid loss @ iteration 9250: Loss=0.023911982525735058
Train Iter 9500: Loss=0.0034): 0%| | 0/10001 [51:12<?, ?it/s]
Valid loss @ iteration 9500: Loss=0.018979898201198213
Train Iter 9750: Loss=0.01038): 0%| | 0/10001 [52:31<?, ?it/s]
Valid loss @ iteration 9750: Loss=0.027951414024250373
Train Iter 10000: Loss=0.00536): 0%| | 0/10001 [53:52<?, ?it/s]
Valid loss @ iteration 10000: Loss=0.02402143661553661
@torch.no_grad()
def test_model(model, test_loader, criterion, device = "cuda"):
""" Some test iterations """
model.eval()
cur_losses = []
for i, ((anchors, positives, negatives),_) in enumerate(test_loader):
# setting inputs to GPU
anchors = anchors.to(device)
positives = positives.to(device)
negatives = negatives.to(device)
# forward pass and triplet loss
anchor_emb, positive_emb, negative_emb = model(anchors, positives, negatives)
loss = criterion(anchor_emb, positive_emb, negative_emb)
cur_losses.append(loss.item())
return np.mean(cur_losses)
utils_v2.visualize_progress(trainer.train_loss[:9500], trainer.valid_loss[:9500], start=0)
print(f"Loss for the test set: {test_model(trainer.model, test_loader, criterion, device)}")
Loss for the test set: 0.04594797717736048
(chosen later for semi-hard training)
model, optimizer, epoch, stats = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="checkpoints/checkpoint_epoch_9500_margin_0.2.pth")
utils_v2.visualize_progress(stats["train_loss"], stats["valid_loss"], start=0)
print(f"Loss for the test set: {test_model(model, test_loader, criterion, device)}")
Loss for the test set: 0.017512838678764336
In the training progress plot, we can see that loss more often that not is equal to 0 (that's why blue loss curve drops all the way down). This is a good sign of training, because it states that positive samples are within the margin and negative ones are outside the margin, thus faces are correctly classified. While loss plots are not that much of a solid metric, it is interesting to see that, loss reaches 0 faster than the model without pretrained weights
# Have to redefine dataloaders because if the batch size is too small, there might not be enough negatives in a batch for the semi-hard condition to be satisfied.
train_loader = torch.utils.data.DataLoader(dataset=tri_train_dataset, batch_size=128, shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=tri_val_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=tri_test_dataset, batch_size=128, shuffle=False)
torch.cuda.empty_cache()
device = "cuda"
model = SiameseModel(use_pretrained = True) # w/pretrained was better
model = model.to(device)
/home/user/rogf1/miniconda3/envs/lab2/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/user/rogf1/miniconda3/envs/lab2/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
criterion = TripletLoss(margin=0.2) # changed criterion
trainer_shard = SemiHardTrainer(model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, n_iters=4001, save_freq = 500, save_name = 'semihard_6k') #only 4k iters, because we load a model trained on 6k iters
# trainer_shard.fit()
Train Iter 0: Loss=0.13788): 0%| | 0/10001 [00:01<?, ?it/s]
Valid loss @ iteration 0: Loss=0.09063652157783508
Train Iter 250: Loss=0.00511): 0%| | 0/10001 [03:52<?, ?it/s]
Valid loss @ iteration 250: Loss=0.02135220542550087
Train Iter 500: Loss=0.01076): 0%| | 0/10001 [07:37<?, ?it/s]
Valid loss @ iteration 500: Loss=0.022602517985635333
Train Iter 750: Loss=0.00991): 0%| | 0/10001 [10:24<?, ?it/s]
Valid loss @ iteration 750: Loss=0.02349506618662013
Train Iter 1000: Loss=0.00489): 0%| | 0/10001 [13:01<?, ?it/s]
Valid loss @ iteration 1000: Loss=0.020588884647521708
Train Iter 1250: Loss=0.00647): 0%| | 0/10001 [15:34<?, ?it/s]
Valid loss @ iteration 1250: Loss=0.02026232647606068
Train Iter 1500: Loss=0.00661): 0%| | 0/10001 [18:08<?, ?it/s]
Valid loss @ iteration 1500: Loss=0.016210711819844112
Train Iter 1750: Loss=0.00351): 0%| | 0/10001 [20:40<?, ?it/s]
Valid loss @ iteration 1750: Loss=0.023578124172571633
Train Iter 2000: Loss=0.00669): 0%| | 0/10001 [23:17<?, ?it/s]
Valid loss @ iteration 2000: Loss=0.01844892981979582
Train Iter 2250: Loss=0.00445): 0%| | 0/10001 [25:59<?, ?it/s]
Valid loss @ iteration 2250: Loss=0.01874490516881148
Train Iter 2500: Loss=0.0076): 0%| | 0/10001 [28:29<?, ?it/s]
Valid loss @ iteration 2500: Loss=0.0183661917431487
Train Iter 2750: Loss=0.00522): 0%| | 0/10001 [31:00<?, ?it/s]
Valid loss @ iteration 2750: Loss=0.01731345026443402
Train Iter 3000: Loss=0.00585): 0%| | 0/10001 [33:29<?, ?it/s]
Valid loss @ iteration 3000: Loss=0.015684681116706796
Train Iter 3250: Loss=0.01159): 0%| | 0/10001 [35:59<?, ?it/s]
Valid loss @ iteration 3250: Loss=0.02159149820605914
Train Iter 3500: Loss=0.00896): 0%| | 0/10001 [38:28<?, ?it/s]
Valid loss @ iteration 3500: Loss=0.01431333382303516
Train Iter 3750: Loss=0.0108): 0%| | 0/10001 [40:59<?, ?it/s]
Valid loss @ iteration 3750: Loss=0.016293713202079136
Train Iter 4000: Loss=0.00646): 0%| | 0/10001 [43:28<?, ?it/s]
Valid loss @ iteration 4000: Loss=0.01614203490316868
Train Iter 4250: Loss=0.00747): 0%| | 0/10001 [45:59<?, ?it/s]
Valid loss @ iteration 4250: Loss=0.013425455066478915
Train Iter 4500: Loss=0.0021): 0%| | 0/10001 [48:29<?, ?it/s]
Valid loss @ iteration 4500: Loss=0.010784317351256808
Train Iter 4750: Loss=0.0008): 0%| | 0/10001 [50:59<?, ?it/s]
Valid loss @ iteration 4750: Loss=0.011344489253436526
Train Iter 5000: Loss=0.02006): 0%| | 0/10001 [53:28<?, ?it/s]
Valid loss @ iteration 5000: Loss=0.03348888705174128
Train Iter 5250: Loss=0.00845): 0%| | 0/10001 [55:58<?, ?it/s]
Valid loss @ iteration 5250: Loss=0.03173380365802182
Train Iter 5500: Loss=0.0039): 0%| | 0/10001 [58:27<?, ?it/s]
Valid loss @ iteration 5500: Loss=0.020204066919783752
Train Iter 5750: Loss=0.0009): 0%| | 0/10001 [1:00:58<?, ?it/s]
Valid loss @ iteration 5750: Loss=0.014078977698874142
Train Iter 6000: Loss=0.00602): 0%| | 0/10001 [1:03:27<?, ?it/s]
Valid loss @ iteration 6000: Loss=0.013521834535317289
Train Iter 6076: Loss=0.00078): 0%| | 0/10001 [1:04:16<?, ?it/s]
# resuming training, since my PC crashed lmao
model, optimizer, epoch, stats = utils_v2.load_model(trainer_shard.model, trainer_shard.optimizer, savepath="checkpoints/checkpoint_semihard_epoch_6000_margin_0.2.pth")
trainer_shard.train_loss = stats["train_loss"]
trainer_shard.valid_loss = stats["valid_loss"]
trainer_shard.model = model
trainer_shard.optimizer = optimizer
trainer_shard.fit()
Train Iter 0: Loss=0.00109): 0%| | 0/4001 [00:01<?, ?it/s]
Valid loss @ iteration 0: Loss=0.016303581444339618
Train Iter 250: Loss=0.00058): 0%| | 0/4001 [02:30<?, ?it/s]
Valid loss @ iteration 250: Loss=0.012939626118168235
Train Iter 500: Loss=0.00034): 0%| | 0/4001 [04:58<?, ?it/s]
Valid loss @ iteration 500: Loss=0.014275793917477131
Train Iter 750: Loss=0.0046): 0%| | 0/4001 [07:28<?, ?it/s]
Valid loss @ iteration 750: Loss=0.014303350096775426
Train Iter 1000: Loss=0.00268): 0%| | 0/4001 [09:57<?, ?it/s]
Valid loss @ iteration 1000: Loss=0.024401098696721926
Train Iter 1250: Loss=0.00302): 0%| | 0/4001 [12:27<?, ?it/s]
Valid loss @ iteration 1250: Loss=0.01377430252937807
Train Iter 1500: Loss=0.00507): 0%| | 0/4001 [14:56<?, ?it/s]
Valid loss @ iteration 1500: Loss=0.01369444839656353
Train Iter 1750: Loss=0.0054): 0%| | 0/4001 [17:26<?, ?it/s]
Valid loss @ iteration 1750: Loss=0.01789008325431496
Train Iter 2000: Loss=0.00522): 0%| | 0/4001 [19:55<?, ?it/s]
Valid loss @ iteration 2000: Loss=0.024799489312701754
Train Iter 2250: Loss=0.00175): 0%| | 0/4001 [22:25<?, ?it/s]
Valid loss @ iteration 2250: Loss=0.023583468256725207
Train Iter 2500: Loss=0.00077): 0%| | 0/4001 [24:54<?, ?it/s]
Valid loss @ iteration 2500: Loss=0.021194605363739863
Train Iter 2750: Loss=0.00746): 0%| | 0/4001 [27:24<?, ?it/s]
Valid loss @ iteration 2750: Loss=0.016660725697875023
Train Iter 3000: Loss=0.00013): 0%| | 0/4001 [29:53<?, ?it/s]
Valid loss @ iteration 3000: Loss=0.013356634066440165
Train Iter 3250: Loss=0.0): 0%| | 0/4001 [32:22<?, ?it/s]
Valid loss @ iteration 3250: Loss=0.01599674408013622
Train Iter 3500: Loss=0.00107): 0%| | 0/4001 [34:51<?, ?it/s]
Valid loss @ iteration 3500: Loss=0.016857331411706075
Train Iter 3750: Loss=0.00462): 0%| | 0/4001 [37:20<?, ?it/s]
Valid loss @ iteration 3750: Loss=0.016888266946706507
Train Iter 4000: Loss=0.00092): 0%| | 0/4001 [39:52<?, ?it/s]
Valid loss @ iteration 4000: Loss=0.0194724609868394
utils_v2.visualize_progress(trainer_shard.train_loss, trainer_shard.valid_loss, start=0)
print(f"Loss for the test set: {test_model(trainer_shard.model, test_loader, criterion, device)}")
Loss for the test set: 0.02015742550914486
We can see, that around 4500 iter/epoch, loss is at its lowest. Let's load and test the model, which was saved at 4500 iter
model_4500, optimizer_4500, epoch_4500, stats_4500 = utils_v2.load_model(trainer_shard.model, trainer_shard.optimizer, savepath="checkpoints/checkpoint_semihard_epoch_4500_margin_0.2.pth")
print(f"Loss for the test set: {test_model(model_4500, test_loader, criterion, device)}")
Loss for the test set: 0.01488266592948801
Test loss is even lower. It may be possible that after 4500 epoch, the model started overfitting, thus increasing valid loss.
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
def display_projections(points, labels, ax=None, legend=None):
""" Displaying low-dimensional data projections """
COLORS = ['grey', 'b', 'g', 'y', 'purple', 'orange', 'k', 'brown', 'r',
'c', "gold", "fuchsia", "lime", "darkred", "tomato", "navy"]
legend = [f"Class {l}" for l in np.unique(labels)] if legend is None else legend
if(ax is None):
_, ax = plt.subplots(1,1,figsize=(12,6))
for i,l in enumerate(np.unique(labels)):
idx = np.where(l==labels)
ax.scatter(points[idx, 0], points[idx, 1], label=legend[int(i)], c=COLORS[i])
ax.legend(loc="best")
# model = trainer.model
device = trainer.device
model = model.eval()
test_dataset = datasets.LFWPeople(root='./data', transform=LFWPeople_tf, split="test",download=True)
labels = torch.Tensor([l for _,l in test_dataset])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
Files already downloaded and verified
def get_same_labels():
# Filtering out labels, which occur more than k times in the test dataset
k = 25
max_label = int(torch.max(labels).item())
same_labels = []
same_tensors = []
for label in range(max_label):
indices = torch.where(labels == label)
if len(indices[0]) > k:
selected_tensors = [test_dataset[i][0] for i in indices[0]]
selected_tensors_labels = [test_dataset[i][1] for i in indices[0]]
same_tensors.append(torch.stack(selected_tensors))
same_labels += selected_tensors_labels
embeddings = [model.forward_one(same_tensors[i].to(device)).cpu().detach() for i in range(len(same_tensors))]
imgs_flat = torch.cat(same_tensors).flatten().cpu().detach()
print(np.unique(same_labels).__len__())
return same_labels
same_labels = get_same_labels()
11
def display_projections_images(points, labels, dataset, ax=None, legend=None):
""" Displaying low-dimensional data projections using images instead of points """
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
COLORS = ['r', 'b', 'g', 'y', 'purple', 'orange', 'k', 'brown', 'grey',
'c', "gold", "fuchsia", "lime", "darkred", "tomato", "navy"]
legend = [f"Class {l}" for l in np.unique(labels)] if legend is None else legend
_, ax = plt.subplots(1,1,figsize=(36,24))
unique_labels = np.unique(labels)
for i, l in enumerate(unique_labels):
if i>0:
idx = np.where(l==labels)
ax.scatter(points[idx, 0], points[idx, 1], label=legend[i], color=COLORS[i])
for i, point in enumerate(points):
if labels[i] == 0:
continue
else:
xy = [point[0], point[1]]
resized_img = transforms.functional.resize(dataset[i][0], (32,32))
arr_img = resized_img.numpy().reshape(3, 32,32).transpose(1, 2, 0)
arr_img = clip_img(arr_img)
l = labels[i]
color_index = np.where(l==unique_labels)
imagebox = OffsetImage(arr_img, zoom=1)
imagebox.image.axes = ax
ab = AnnotationBbox(imagebox, xy,
xybox=(0, 0),
xycoords='data',
boxcoords="offset points",
pad=0.1,
bboxprops=dict(edgecolor=COLORS[color_index[0][0]], lw=2)
)
ax.add_artist(ab)
ax.legend(loc="best")
plt.show()
device = "cuda"
model = SiameseModel(use_pretrained = True) # w/pretrained was better
# model = model.to(device)
# trainer.model = model
model, _, _, _ = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="/home/user/rogf1/CudaVisionWS23/Assignment7/checkpoints/checkpoint_epoch_9500_margin_0.2.pth")
model = model.to(device)
model = model.eval()
visualization_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
embs = list()
labels = list()
imgs_flat = list()
with torch.no_grad():
for data, label in visualization_loader:
data = data.to(device)
data_embedding = model.forward_one(data)
labels.append(label)
embs.append(data_embedding.cpu().flatten(1))
imgs_flat.append(data.cpu().flatten(1))
labels = np.concatenate(labels)
embs = np.concatenate(embs)
imgs_flat = np.concatenate(imgs_flat)
# assert 0 not in same_labels, "0 in same_labels change the label of the undefined cluster"
filtered_labels = [int(labels[i].item()) if labels[i] in same_labels else 0 for i in range(labels.__len__())]
pca_imgs = PCA(n_components=2).fit_transform(imgs_flat)
pca_embs = PCA(n_components=2).fit_transform(embs)
N = 5000
plt.style.use('seaborn')
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(pca_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("PCA Proj. of Images")
display_projections(pca_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("PCA Proj. of Embeddings")
plt.show()
/tmp/ipykernel_2473958/734107461.py:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn')
N = 2000
tsne_imgs = TSNE(n_components=2).fit_transform(imgs_flat[:N])
tsne_embs = TSNE(n_components=2).fit_transform(embs[:N])
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(tsne_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("T-SNE Proj. of Images")
display_projections(tsne_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("T-SNE Proj. of Embeddings")
plt.show()
Classes are seperated in a visible fashion, it is clearlt better than the embeddings below for the non-pretrained model
device = "cuda"
model = SiameseModel(use_pretrained = False) # w/pretrained was better
model = model.to(device)
trainer.model = model
model, _, _, _ = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="/home/user/rogf1/CudaVisionWS23/Assignment7/checkpoints/checkpoint_rand_weights_epoch_9500_margin_0.2.pth")
# model = trainer.model
model = model.eval()
visualization_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
embs = list()
labels = list()
imgs_flat = list()
with torch.no_grad():
for data, label in visualization_loader:
data = data.to(device)
data_embedding = model.forward_one(data)
labels.append(label)
embs.append(data_embedding.cpu().flatten(1))
imgs_flat.append(data.cpu().flatten(1))
labels = np.concatenate(labels)
embs = np.concatenate(embs)
imgs_flat = np.concatenate(imgs_flat)
pca_imgs = PCA(n_components=2).fit_transform(imgs_flat)
pca_embs = PCA(n_components=2).fit_transform(embs)
assert 0 not in same_labels, "0 in same_labels change the label of the undefined cluster"
filtered_labels = [int(labels[i].item()) if labels[i] in same_labels else 0 for i in range(labels.__len__())]
N = 5000
plt.style.use('seaborn')
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(pca_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("PCA Proj. of Images")
display_projections(pca_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("PCA Proj. of Embeddings")
plt.show()
/tmp/ipykernel_2348380/4219804463.py:9: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn')
N = 2000
tsne_imgs = TSNE(n_components=2).fit_transform(imgs_flat[:N])
tsne_embs = TSNE(n_components=2).fit_transform(embs[:N])
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(tsne_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("T-SNE Proj. of Images")
display_projections(tsne_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("T-SNE Proj. of Embeddings")
plt.show()
It can be seen that TSNE embeddings are generally more separated than PCE embeddings for all the cases in this assignment
class SiameseModel2(nn.Module):
"""
Implementation of a simple siamese model
"""
def __init__(self, emb_dim=32, in_spatial=(32, 32), use_pretrained = True):
""" Module initializer """
super().__init__()
# convolutional feature extractor
resnet = models.resnet18(pretrained=use_pretrained)
modules = list(resnet.children())[:-1]
self.resnet = nn.Sequential(*modules)
# fully connected embedder
self.fc = nn.Linear(512, emb_dim)
# auxiliar layers
self.flatten = nn.Flatten()
self.norm = NormLayer()
return
def forward_one(self, x):
""" Forwarding just one sample through the model """
x = self.resnet(x)
x_flat = self.flatten(x)
x_emb = self.fc(x_flat)
x_emb_norm = self.norm(x_emb)
return x_emb_norm
def forward(self, anchor, positive):
""" Forwarding a triplet """
anchor_emb = self.forward_one(anchor)
positive_emb = self.forward_one(positive)
return anchor_emb, positive_emb
train_loader = torch.utils.data.DataLoader(dataset=tri_train_dataset, batch_size=128, shuffle=True)
valid_loader = torch.utils.data.DataLoader(dataset=tri_val_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=tri_test_dataset, batch_size=128, shuffle=False)
class SemiHardTripletLoss(nn.Module):
""" Implementation of the triplet loss function """
def __init__(self, margin=0.2, reduce="mean"):
""" Module initializer """
assert reduce in ["mean", "sum"]
super().__init__()
self.margin = margin
self.reduce = reduce
return
def forward(self, anchor, positive, labels):
"""
Compute the TripletLoss using semi-hard negative mining strategy.
This function is vectorized and does not contain explicit for loops.
We note, that we return the closest negative, if no negatives inside the margin exist
"""
d_ap = (anchor - positive).pow(2).sum(dim=-1)
anchor_pairwise_dist = torch.cdist(anchor, positive, p=2)
diag = torch.eye(len(anchor)).to(anchor.device) * 1e6
anchor_pairwise_dist = anchor_pairwise_dist + diag
mask = (anchor_pairwise_dist > d_ap.unsqueeze(1)) & (labels != labels.unsqueeze(1))
d_an, _ = torch.where(mask, anchor_pairwise_dist, 1e6).min(dim=1)
loss = (d_ap - d_an + self.margin)
loss = torch.maximum(loss, torch.zeros_like(loss))
loss = torch.mean(loss) if(self.reduce == "mean") else torch.sum(loss)
return loss
class Trainer2:
"""
Class for training and validating a siamese model
"""
def __init__(self, model, criterion, train_loader, valid_loader, n_iters=1e4, save_freq = 500, margin = 0.2, save_name='siamese'):
""" Trainer initializer """
self.model = model
self.criterion = criterion
self.train_loader = train_loader
self.valid_loader = valid_loader
self.n_iters = int(n_iters)
self.optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(self.device)
self.save_name = save_name
self.margin = 0.2
self.save_freq = save_freq
self.train_loss = []
self.valid_loss = []
return
@torch.no_grad()
def valid_step(self, val_iters=100):
""" Some validation iterations """
self.model.eval()
cur_losses = []
for i, ((anchors, positives, negatives), (labels, _, _)) in enumerate(self.valid_loader):
# setting inputs to GPU
anchors = anchors.to(self.device)
positives = positives.to(self.device)
labels = labels.to(device)
# forward pass and triplet loss
anchor_emb, positive_emb = self.model(anchors, positives)
loss = self.criterion(anchor_emb, positive_emb, labels)
cur_losses.append(loss.item())
if(i >= val_iters):
break
self.valid_loss += cur_losses
self.model.train()
return cur_losses
def fit(self):
""" Train/Validation loop """
self.iter_ = 0
progress_bar = tqdm(total=self.n_iters, initial=0)
for i in range(self.n_iters):
for (anchors, positives, negatives), (labels, pos_labels, _) in self.train_loader:
# setting inputs to GPU
anchors = anchors.to(self.device)
positives = positives.to(self.device)
labels = labels.to(device)
# forward pass and triplet loss
anchor_emb, positive_emb = self.model(anchors, positives)
loss = self.criterion(anchor_emb, positive_emb, labels)
self.train_loss.append(loss.item())
# optimization
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# updating progress bar
progress_bar.set_description(f"Train Iter {self.iter_}: Loss={round(loss.item(),5)})")
# doing some validation every once in a while
if(self.iter_ % 250 == 0):
cur_losses = self.valid_step()
print(f"Valid loss @ iteration {self.iter_}: Loss={np.mean(cur_losses)}")
# saving model every n-th iter
if(self.iter_ % self.save_freq == 0):
stats = {
"train_loss": self.train_loss,
"valid_loss": self.valid_loss
}
utils_v2.save_model(self.model, self.optimizer, self.iter_, stats, margin=self.margin)
self.iter_ = self.iter_+1
if(self.iter_ >= self.n_iters):
break
if(self.iter_ >= self.n_iters):
break
return
device = 'cuda' if torch.cuda.is_available() else 'cpu'
criterion = SemiHardTripletLoss(margin=0.2)
model = SiameseModel2(use_pretrained = True)
model = model.to(device)
trainer = Trainer2(model=model, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, n_iters=10001, save_freq = 500, save_name = 'rand_weights')
# train_loader_semihard = torch.utils.data.DataLoader(dataset=tri_train_dataset, batch_size=128, shuffle=True)
/home/user/lschulze/anaconda3/envs/lab/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( /home/user/lschulze/anaconda3/envs/lab/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
trainer.fit()
Valid loss @ iteration 0: Loss=0.0586702645652824
Valid loss @ iteration 250: Loss=0.012883161505063375
Valid loss @ iteration 500: Loss=0.014203465854128202
Valid loss @ iteration 750: Loss=0.014916231855750084
Valid loss @ iteration 1000: Loss=0.0133448691210813
Valid loss @ iteration 1250: Loss=0.013322920641965337
Valid loss @ iteration 1500: Loss=0.015133819025423791
Valid loss @ iteration 1750: Loss=0.01493771094828844
Valid loss @ iteration 2000: Loss=0.014930905981196297
Valid loss @ iteration 2250: Loss=0.01367328522933854
Valid loss @ iteration 2500: Loss=0.011105063940501876
Valid loss @ iteration 2750: Loss=0.011715121877690157
Valid loss @ iteration 3000: Loss=0.012865047798388533
Valid loss @ iteration 3250: Loss=0.013354246608085103
Valid loss @ iteration 3500: Loss=0.014355967959596051
Valid loss @ iteration 3750: Loss=0.013391844980004761
Valid loss @ iteration 4000: Loss=0.013905214456220468
Valid loss @ iteration 4250: Loss=0.012420321711235575
Valid loss @ iteration 4500: Loss=0.015995711760802403
Valid loss @ iteration 4750: Loss=0.016530296972228423
Valid loss @ iteration 5000: Loss=0.012308364371872611
Valid loss @ iteration 5250: Loss=0.012671786670883497
Valid loss @ iteration 5500: Loss=0.013471152529948287
Valid loss @ iteration 5750: Loss=0.013406235931648148
Valid loss @ iteration 6000: Loss=0.016179928556084633
Valid loss @ iteration 6250: Loss=0.014562521440287432
Valid loss @ iteration 6500: Loss=0.014507339439458318
Valid loss @ iteration 6750: Loss=0.015391649471388923
Valid loss @ iteration 7000: Loss=0.015577871766355302
Valid loss @ iteration 7250: Loss=0.016578892452849284
Valid loss @ iteration 7500: Loss=0.01477938186791208
Valid loss @ iteration 7750: Loss=0.013535071371330155
Valid loss @ iteration 8000: Loss=0.011874745186004374
Valid loss @ iteration 8250: Loss=0.014340193424787786
Valid loss @ iteration 8500: Loss=0.014057943286995092
Valid loss @ iteration 8750: Loss=0.021744205719894834
Valid loss @ iteration 9000: Loss=0.013902556565072801
Valid loss @ iteration 9250: Loss=0.017108396937449772
Valid loss @ iteration 9500: Loss=0.013138232856161065
Valid loss @ iteration 9750: Loss=0.017129433444804616
Train Iter 10000: Loss=0.00079): 0%| | 0/10001 [2:34:11<?, ?it/s]
Valid loss @ iteration 10000: Loss=0.013132999030252298
model, _, _, stats = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="/home/user/lschulze/projects/CudaVisionWS23/Assignment7/checkpoints/checkpoint_epoch_10000_margin_0.2.pth")
utils_v2.visualize_progress(stats['train_loss'][:10000], stats['valid_loss'], start=0)
The loss plots look as expected. When looking at the validation loss, we observe a light increase in loss however. This might be due to overfitting, but might also be due to variance in the loss development. Considering, it did not affect our results too much, we did not investigate this issue further. Nevertheless, we investigated the lower loss results using the early stopping technique.
device = "cuda"
# model = SiameseModel(use_pretrained = True) # w/pretrained was better
model = model.to(device)
trainer.model = model
model, _, _, _ = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="/home/user/lschulze/projects/CudaVisionWS23/Assignment7/checkpoints/checkpoint_epoch_9500_margin_0.2.pth")
device = trainer.device
model = model.eval()
device = "cuda"
# model = SiameseModel(use_pretrained = True) # w/pretrained was better
model = model.to(device)
trainer.model = model
model, _, _, _ = utils_v2.load_model(trainer.model, trainer.optimizer, savepath="/home/user/lschulze/projects/CudaVisionWS23/Assignment7/checkpoints/checkpoint_epoch_2500_margin_0.2.pth")
device = trainer.device
model = model.eval()
test_dataset = datasets.LFWPeople(root='./data', transform=LFWPeople_tf, split="test",download=True)
labels = torch.Tensor([l for _,l in test_dataset])
same_labels = get_same_labels()
Files already downloaded and verified 11
visualization_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
embs = list()
labels = list()
imgs_flat = list()
with torch.no_grad():
for data, label in visualization_loader:
data = data.to(device)
data_embedding = model.forward_one(data)
labels.append(label)
embs.append(data_embedding.cpu().flatten(1))
imgs_flat.append(data.cpu().flatten(1))
labels = np.concatenate(labels)
embs = np.concatenate(embs)
imgs_flat = np.concatenate(imgs_flat)
assert 0 not in same_labels, "0 in same_labels change the label of the undefined cluster"
filtered_labels = [int(labels[i].item()) if labels[i] in same_labels else 0 for i in range(labels.__len__())]
pca_imgs = PCA(n_components=2).fit_transform(imgs_flat)
pca_embs = PCA(n_components=2).fit_transform(embs)
N = 5000
plt.style.use('seaborn')
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(pca_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("PCA Proj. of Images")
display_projections(pca_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("PCA Proj. of Embeddings")
plt.show()
/tmp/ipykernel_3962321/1407309200.py:5: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn')
N = 2000
tsne_imgs = TSNE(n_components=2).fit_transform(imgs_flat[:N])
tsne_embs = TSNE(n_components=2).fit_transform(embs[:N])
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(tsne_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("T-SNE Proj. of Images")
display_projections(tsne_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("T-SNE Proj. of Embeddings")
plt.show()
We consider the final plots below to be the better result, due to embeddings of similar classes staying closer together, thus having lower distance to each other. We can infer this especially from our T-SNE plot, which clusters points together, which are close together in the initial space. Therefore a k-NN search of a person in the vicinity would most probably return almost all similar class members. One could perhaps argue that the PCA of the "earlier" plot differntiates a bit better between classes. This comes at the cost of higher variance between same class points however.
pca_imgs = PCA(n_components=2).fit_transform(imgs_flat)
pca_embs = PCA(n_components=2).fit_transform(embs)
N = 5000
plt.style.use('seaborn')
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(pca_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("PCA Proj. of Images")
display_projections(pca_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("PCA Proj. of Embeddings")
plt.show()
/tmp/ipykernel_3962321/1407309200.py:5: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.
plt.style.use('seaborn')
N = 2000
tsne_imgs = TSNE(n_components=2).fit_transform(imgs_flat[:N])
tsne_embs = TSNE(n_components=2).fit_transform(embs[:N])
fig,ax = plt.subplots(1,2,figsize=(26,8))
display_projections(tsne_imgs[:N], filtered_labels[:N], ax=ax[0])
ax[0].set_title("T-SNE Proj. of Images")
display_projections(tsne_embs[:N], filtered_labels[:N], ax=ax[1])
ax[1].set_title("T-SNE Proj. of Embeddings")
plt.show()
Comparing the first two models using the loss function we can observe that the pretrained model performs better than the one with random weights. This is expected, considering that pretraining models makes the model already learn a somewhat decent initial embedding for input images. Looking at the embeddings we also detect better performance of the pretrained model. This is evident given the fact, that the PCA and t-SNE projections separate our chosen classes better than the non-pretrained model.
The best performance can however be observed looking at the model trained using semi-hard sampling. Considering, we used different loss functions we are not able to properly compare them quantitatively. Looking at the quality difference of the embeddings we note, that this might not even be necessary however. The t-SNE projection is able to effectively cluster almost all points of the same class successfully together. The number of outliers has been reduced significantly. As already indicated, it is valid to infer from this, that the embeddings of the classes have a very low distance between each other. The PCA embeddings also look much cleaner and better separated than the ones of the models not using semi-hard sampling.
Nevertheless, we do believe that we can improve the model even more. A big problem throughout testing was memory issues with our CUDA machines. We were lucky to be able to train our model with a batch size of 128. Looking at the paper however we can observe that this is not necessarily the best option, considering they were using much larger sizes after conducting in-depth research. Using larger batch sizes, would also diversify our choices of negative anchors in the semi-hard sampling phase. Those reasons make us believe that further improvement is certainly possible.